AMP Phase II CITE seq data - subset Tregs¶

In [1]:
suppressPackageStartupMessages({
    library(Seurat)
    library(ggplot2)
    library(dplyr)
    library(ggthemes)
    library(uwot)
    library(symphony)
    library(pheatmap)
    library(harmony)
    library(readxl)
    library(presto)
    library(anndata)
    library(singlecellmethods)
    library(rlang)
})

fig.size <- function(h, w) {
    options(repr.plot.height = h, repr.plot.width = w)
}
In [2]:
data.path <- "/data/brennerlab/AMP_Phase_2/Data/Processed_single_cell_data/processed_output_04-11-2023/"
T.metadata.p <- paste0(data.path, "T_reference.rds")
T.uwot.p <- paste0(data.path, "T_uwot_model")
all.qc.p <- paste0(data.path, "qc_mRNA_314011cells_log_normalized_matrix.rds")
all.qc.prot.p <- paste0(data.path, "qc_protein_CLR_normalized_filtered_matrix.rds")
all.raw.counts <- paste0(data.path, "raw_mRNA_count_matrix.rds")
all.raw.counts.prot <- paste0(data.path, "raw_protein_count_matrix.rds")
CTAP.path <- paste0(data.path, "CTAP_donor_mapping.xlsx")
donor.meta.data <- "/data/brennerlab/Shani/projects/AMP_Phase_2/treatment assigned metadata_clin_donor_singlecell.csv"
In [4]:
set.seed(1)

0. Load T cells data¶

In [5]:
amp_ref <- readRDS(T.metadata.p)
amp_meta <- amp_ref$meta_data
In [6]:
str(amp_ref, 1)
List of 11
 $ meta_data     :'data.frame':	94046 obs. of  13 variables:
 $ vargenes      : tibble [5,179 × 3] (S3: tbl_df/tbl/data.frame)
 $ loadings      : num [1:5179, 1:20] 0.013549 0.001822 -0.000413 0.003793 -0.003837 ...
  ..- attr(*, "dimnames")=List of 2
 $ R             : num [1:100, 1:94046] 3.75e-09 1.57e-07 1.54e-06 8.96e-06 3.88e-09 ...
 $ Z_orig        : num [1:20, 1:94046] -0.48237 -1.13399 0.85466 0.00182 -0.08951 ...
 $ Z_corr        : num [1:20, 1:94046] -0.689 -0.769 0.98 -0.504 0.207 ...
 $ betas         : num [1:82, 1:20, 1:100] 1.084319 0.318315 -0.010745 -0.007796 0.000507 ...
 $ centroids     : num [1:20, 1:100] 0.372 -0.101 -0.162 0.32 -0.188 ...
 $ cache         :List of 2
 $ umap          :List of 1
 $ save_uwot_path: chr "/data/srlab2/anathan/AMP/data/2020.11.25_Phase2RA_Tcells_cca_400_raw_postHarmony_Tfilter2_refumap.rds"
In [7]:
amp_ref %>% summary
               Length  Class      Mode     
meta_data           13 data.frame list     
vargenes             3 tbl_df     list     
loadings        103580 -none-     numeric  
R              9404600 -none-     numeric  
Z_orig         1880920 -none-     numeric  
Z_corr         1880920 -none-     numeric  
betas           164000 -none-     numeric  
centroids         2000 -none-     numeric  
cache                2 -none-     list     
umap                 1 -none-     list     
save_uwot_path       1 -none-     character
In [8]:
fig.size(12,10)
amp_meta %>% with(table(cluster_name)) %>% as.data.frame() %>% 
    ggplot() + 
        geom_col(aes(Freq, cluster_name)) + theme_bw(base_size = 24)
In [9]:
amp_meta$cluster_name%>% table()
.
      T-0: CD4+ IL7R+ memory      T-1: CD4+ CD161+ memory 
                        9902                         8520 
      T-10: CD4+ OX40+NR3C1+     T-11: CD4+ CD146+ memory 
                        2553                         1867 
            T-12: CD4+ GNLY+    T-13: CD8+ GZMK/B+ memory 
                        1604                         8693 
     T-14: CD8+ GZMK+ memory       T-15: CD8+ GZMB+/TEMRA 
                        6870                         4723 
  T-16: CD8+ CD45ROlow/naive T-17: CD8+ activated/NK-like 
                        4554                          692 
         T-18: Proliferating  T-19: MT-high (low quality) 
                        1952                         1702 
 T-2: CD4+ IL7R+CCR5+ memory                  T-20: CD38+ 
                        6595                          748 
           T-21: Innate-like                T-22: Vdelta1 
                        1473                         3373 
               T-23: Vdelta2            T-3: CD4+ Tfh/Tph 
                        1161                         6118 
             T-4: CD4+ naive       T-5: CD4+ GZMK+ memory 
                        4947                         4485 
            T-6: CD4+ memory                T-7: CD4+ Tph 
                        3167                         2859 
    T-8: CD4+ CD25-high Treg      T-9: CD4+ CD25-low Treg 
                        2841                         2647 
In [10]:
2841+2647
5488
In [11]:
amp_ref$save_uwot_path <- T.uwot.p
In [12]:
amp_ref$meta_data%>% head
A data.frame: 6 × 13
cellsampleclustercell_typedonorfibroTfilternUMInGenepercent_mitoTfilter2cluster_numbercluster_name
<chr><chr><chr><chr><chr><lgl><lgl><dbl><int><dbl><lgl><chr><chr>
201003BRI-399_AAACGAATCTGCATGABRI-399T-6: CD4+ memory T cellBRI-399TRUEFALSE756021570.06521164TRUET-6 T-6: CD4+ memory
301006BRI-399_AAACGCTTCCTTGACCBRI-399T-5: CD4+ GZMK+ memory T cellBRI-399TRUEFALSE256311710.16269996TRUET-5 T-5: CD4+ GZMK+ memory
401005BRI-399_AAAGGGCAGCCGGAATBRI-399T-8: CD4+ CD25-high TregT cellBRI-399TRUEFALSE492217410.07334417TRUET-8 T-8: CD4+ CD25-high Treg
431002BRI-399_AAAGGGCCACTATGTGBRI-399T-22: Vdelta1 T cellBRI-399TRUEFALSE396816190.06880040TRUET-22T-22: Vdelta1
531002BRI-399_AAAGGTAGTGCAGGATBRI-399T-8: CD4+ CD25-high TregT cellBRI-399TRUEFALSE410114920.09022190TRUET-8 T-8: CD4+ CD25-high Treg
691002BRI-399_AAAGTGAGTTGTGGCCBRI-399T-14: CD8+ GZMK+ memory T cellBRI-399TRUEFALSE339014170.02300885TRUET-14T-14: CD8+ GZMK+ memory

0.1. Add counts data¶

In [13]:
# load counts and qc counts
amp_counts <- readRDS(all.raw.counts)
In [14]:
dim(amp_counts)
dim(amp_meta)
  1. 33538
  2. 576600
  1. 94046
  2. 13
In [15]:
#filter counts to to include T cells only by metadata
cells <- amp_meta$cell
amp_counts <- amp_counts[,cells]

0.2. Add protein data¶

In [16]:
# load counts and qc counts
protein_counts <- readRDS(all.raw.counts.prot)
In [17]:
dim(protein_counts)
  1. 58
  2. 576600
In [18]:
#filter counts to to include T cells only by metadata
protein_counts <- protein_counts[,cells]

0.3. Create Seurat obj¶

In [19]:
meta <- amp_meta %>% tibble::remove_rownames()%>% tibble::column_to_rownames(var = "cell")
amp <- CreateSeuratObject(counts = amp_counts, meta.data = meta, project = "AMPII")
amp[["ADT"]] <- CreateAssayObject(counts = protein_counts)
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
In [20]:
donor.meta <- read.csv(donor.meta.data)
merged.meta <- amp@meta.data%>% left_join(donor.meta, by =join_by("orig.ident" == "mRNA_run"))
head(merged.meta, 3)
A data.frame: 3 × 32
orig.identnCount_RNAnFeature_RNAsampleclustercell_typedonorfibroTfilternUMI⋯sexageCDAIdisease_durationtissue_typekrenn_inflammationsingle_cell_techcell_count_to_10xprotein_runatac_run
<chr><dbl><int><chr><chr><chr><chr><lgl><lgl><dbl>⋯<chr><int><chr><dbl><chr><dbl><chr><chr><chr><chr>
1BRI-39975602157BRI-399T-6: CD4+ memory T cellBRI-399TRUEFALSE7560⋯Female70NAArthroplastyNACITEseq + flow/bulk + re-freeze15,000BRI-400BRI-448
2BRI-39925631171BRI-399T-5: CD4+ GZMK+ memory T cellBRI-399TRUEFALSE2563⋯Female70NAArthroplastyNACITEseq + flow/bulk + re-freeze15,000BRI-400BRI-448
3BRI-39949221741BRI-399T-8: CD4+ CD25-high TregT cellBRI-399TRUEFALSE4922⋯Female70NAArthroplastyNACITEseq + flow/bulk + re-freeze15,000BRI-400BRI-448
In [21]:
merged.meta$subject_id%>% head
  1. '301-0267'
  2. '301-0267'
  3. '301-0267'
  4. '301-0267'
  5. '301-0267'
  6. '301-0267'
In [22]:
colnames(donor.meta)
colnames(amp@meta.data)
  1. 'subject_id'
  2. 'pipeline_date'
  3. 'site'
  4. 'treatment'
  5. 'biopsied'
  6. 'sex'
  7. 'age'
  8. 'CDAI'
  9. 'disease_duration'
  10. 'tissue_type'
  11. 'krenn_inflammation'
  12. 'single_cell_tech'
  13. 'cell_count_to_10x'
  14. 'mRNA_run'
  15. 'protein_run'
  16. 'atac_run'
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'sample'
  5. 'cluster'
  6. 'cell_type'
  7. 'donor'
  8. 'fibro'
  9. 'Tfilter'
  10. 'nUMI'
  11. 'nGene'
  12. 'percent_mito'
  13. 'Tfilter2'
  14. 'cluster_number'
  15. 'cluster_name'
  16. 'nCount_ADT'
  17. 'nFeature_ADT'
In [23]:
#Add metadata
amp <- AddMetaData(amp, merged.meta$age, "age")
amp <- AddMetaData(amp, merged.meta$sex, "sex")
amp <- AddMetaData(amp, merged.meta$treatment, "treatment")
amp <- AddMetaData(amp, merged.meta$CDAI, "CDAI")
amp <- AddMetaData(amp, merged.meta$disease_duration, "disease.duration")
amp <- AddMetaData(amp, merged.meta$tissue_type, "tissue.type")
amp <- AddMetaData(amp, merged.meta$krenn_inflammation, "krenn")
amp <- AddMetaData(amp, merged.meta$subject_id, "subject_id")
In [24]:
# Add CTAP info
ctap.file <- read_xlsx(CTAP.path)
ctap.file%>% head
A tibble: 6 × 3
subject_iddonorCTAP
<chr><chr><chr>
300-0310BRI-405E + F + M
300-0309BRI-411E + F + M
300-0174BRI-479E + F + M
300-0175BRI-525E + F + M
300-0529BRI-554E + F + M
300-0145BRI-589E + F + M
In [25]:
donorID <- amp@meta.data%>% select(orig.ident)%>% tibble::rownames_to_column(., "cellID")%>% 
            left_join(ctap.file, by = join_by(orig.ident == donor))%>% 
            tibble::column_to_rownames(., "cellID") %>% .[rownames(amp@meta.data),]
amp <- AddMetaData(amp, donorID$CTAP, "CTAP")
In [26]:
fig.size(5,7)
sum.donor <- amp@meta.data %>% select(donor, CTAP, treatment, sex) %>% distinct()
pt <- table(sum.donor$CTAP[sum.donor$treatment != "OA"], sum.donor$sex[sum.donor$treatment != "OA"])
pt
pt %>%  data.frame()%>% setNames(c("CTAP", "SEX", "Freq")) %>% mutate(per = Freq / ifelse(SEX == "Female", sum(Freq[SEX == "Female"]), sum(Freq[SEX == "Male"])))%>% 
    ggplot(aes(x = CTAP, y = per, fill = SEX)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack")
pl + geom_col(position = "fill")
           
            Female Male
  E + F + M      6    1
  F              7    4
  M             13    5
  T + B         11    3
  T + F          6    2
  T + M          9    3
In [27]:
fig.size(5,20)
VlnPlot(amp, features = "nFeature_RNA", group.by = "cluster", pt.size = 0)
VlnPlot(amp, features = "nCount_RNA", group.by = "cluster", pt.size = 0)
VlnPlot(amp, features = "percent_mito", group.by = "cluster", pt.size = 0)
In [ ]:

0.4. Preprocesing¶

In [27]:
#RNA
amp <- amp %>% 
        NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
        ScaleData()%>% 
        FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
        RunPCA(verbose = F)
        # RunBalancedPCA(weight.by='orig.ident')

#protein
amp <- amp %>% 
        NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
        ScaleData(assay = "ADT")
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

In [28]:
# add variable gene list
# amp@assays$RNA@var.features <- amp_ref$vargenes$symbol
In [29]:
amp@assays$ADT%>% rownames
  1. 'CD107a/LAMP1-prot'
  2. 'CD112/Nectin-2-prot'
  3. 'CD119/IFN-gamma-R-alpha-chain-prot'
  4. 'CD11b-prot'
  5. 'CD11c-prot'
  6. 'CD127/IL-7R-alpha-prot'
  7. 'CD140a/PDGFR-alpha-prot'
  8. 'CD141/Thrombomodulin-prot'
  9. 'CD144/VE-Cadherin-prot'
  10. 'CD146-prot'
  11. 'CD14-prot'
  12. 'CD155/PVR-prot'
  13. 'CD161-prot'
  14. 'CD163-prot'
  15. 'CD16-prot'
  16. 'CD192/CCR2-prot'
  17. 'CD195/CCR5-prot'
  18. 'CD196/CCR6-prot'
  19. 'CD19-prot'
  20. 'CD1c-prot'
  21. 'CD206/MMR-prot'
  22. 'CD209/DC-SIGN-prot'
  23. 'CD20-prot'
  24. 'CD21-prot'
  25. 'CD226/DNAM-1(11A8)-prot'
  26. 'CD24-prot'
  27. 'CD273/B7-DC/PD-L2-prot'
  28. 'CD274/B7-H1/PD-L1-prot'
  29. 'CD278/ICOS-prot'
  30. 'CD279/PD-1-prot'
  31. 'CD27(LG.3A10)-prot'
  32. 'CD304/Neuropilin-1-prot'
  33. 'CD309/VEGFR2-prot'
  34. 'CD314/NKG2D-prot'
  35. 'CD31-prot'
  36. 'CD34-prot'
  37. 'CD38(HIT2)-prot'
  38. 'CD3-prot'
  39. 'CD44-prot'
  40. 'CD45(2D1)-prot'
  41. 'CD45RA-prot'
  42. 'CD45RO-prot'
  43. 'CD4-prot'
  44. 'CD55-prot'
  45. 'CD56/NCAM-prot'
  46. 'CD64-prot'
  47. 'CD68-prot'
  48. 'CD69-prot'
  49. 'CD86-prot'
  50. 'CD8a-prot'
  51. 'CD90/THY1-prot'
  52. 'CX3CR1-prot'
  53. 'EGFR-prot'
  54. 'FR-beta-prot'
  55. 'HLA-DR-prot'
  56. 'IgG-Fc-prot'
  57. 'IgM-prot'
  58. 'Podoplanin-prot'
In [30]:
a <- amp_ref$umap$embedding
colnames(a) <- c("UMAP_1", "UMAP_2")
rownames(a) <-amp_ref$meta_data$cell
a%>%  head
A matrix: 6 × 2 of type dbl
UMAP_1UMAP_2
BRI-399_AAACGAATCTGCATGA-0.7474533-0.7355343
BRI-399_AAACGCTTCCTTGACC 2.6602657 0.7497368
BRI-399_AAAGGGCAGCCGGAAT-2.1143181 2.0444754
BRI-399_AAAGGGCCACTATGTG 5.5098784 1.5855038
BRI-399_AAAGGTAGTGCAGGAT-1.5639387 2.5774951
BRI-399_AAAGTGAGTTGTGGCC 4.2999895-0.9138112
In [31]:
amp@reductions$umap <- CreateDimReducObject(embeddings = a, key = "UMAP_", assay= "RNA")
In [10]:
fig.size(8,14)
DimPlot(amp, reduction = "umap", group.by = "cluster")
In [33]:
fig.size(5,18)
FeaturePlot(amp, features = c("FOXP3","IL2RA","IL7R", "MKI67"), ncol = 4)
In [81]:
Idents(amp) <- "cluster_number"
IL2RA <- wilcoxauc(amp, groups_use = c("T-8", "T-9"))
In [82]:
m.show <- IL2RA %>%
    group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)
IL2RA %>% write.csv("../../analysis/AMP2_RA_tissue/Treg.DEG.IL2RA.csv")
m.show %>% write.csv("../../analysis/AMP2_RA_tissue/Treg.DEG.IL2RA.Passed.csv")
m.show[1:30,]
A tibble: 30 × 3
rowT-8T-9
<int><chr><chr>
1IL32 JUNB
2HLA-DRB1 RPS12
3CTSC RPS6
4TNFRSF18 RPS3A
5LGALS1 RPL32
6CD74 EEF1B2
7CYTOR RPLP2
8S100A4 RPS13
9RNF213 RPS14
10TNFRSF4 FOS
11LGALS3 RPS8
12HLA-DPB1 RPL30
13MAF RPS18
14MYL6 RPL9
15NEAT1 RPL13
16CTLA4 RPS20
17TNFRSF1B ZFP36L2
18HLA-DPA1 RPL3
19S100A6 RPL39
20MIR4435-2HGRPL34
21TYMP RPL35A
22CD2 RPS23
23ARPC1B RPL22
24ENO1 MAL
25HLA-A RPL38
26FOXP3 RPL5
27IL2RA RPS29
28UCP2 RPS5
29FAM129A RPS15A
30GLRX RPS27A

0.5. Save¶

In [34]:
# library(anndata)
# ad <- AnnData(
#   X = t(amp@assays$RNA@counts),
#   obs = amp@meta.data,
#   var = data.frame(row.names = rownames(amp))
# )

# write_h5ad(ad, "../../analysis/AMP2_RA_tissue/AMP.allTcells.h5ad")
saveRDS(amp, "/data/brennerlab/Shani/projects/AMP_Phase_2/AMP2_Tcells_Seurat.rds")
In [3]:
amp <- readRDS("/data/brennerlab/Shani/projects/AMP_Phase_2/AMP2_Tcells_Seurat.rds")
In [3]:
fig.size(8,10)
ampdat <- amp@reductions$umap@cell.embeddings %>% data.frame%>% mutate(cluster = amp$cluster) %>% 
mutate(highlight = if_else(grepl("Treg", cluster), cluster, "Other T"))

ggplot(ampdat, aes(x=UMAP_1, y = UMAP_2, color = highlight)) + geom_point(size =0.5) + 
scale_color_manual(values = c("grey", "#43A5BE", "#F07857")) + theme_bw(base_size = 20)+ coord_fixed() 

fig.size(8,20)
ggplot(ampdat, aes(x=UMAP_1, y = UMAP_2, color = cluster)) + geom_point(size =0.5) + coord_fixed() +
theme_bw(base_size = 20)
# data.frame("umap1" = amp@reductions$umap$UMAP_1, "umap2" = amp@reductions$umap$UMAP_2, "cluster" = amp$cluster)
Error in data.frame(.): object 'amp' not found
Traceback:

1. mutate(., highlight = if_else(grepl("Treg", cluster), cluster, 
 .     "Other T"))
2. mutate(., cluster = amp$cluster)
3. data.frame(.)
4. .handleSimpleError(function (cnd) 
 . {
 .     watcher$capture_plot_and_output()
 .     cnd <- sanitize_call(cnd)
 .     watcher$push(cnd)
 .     switch(on_error, continue = invokeRestart("eval_continue"), 
 .         stop = invokeRestart("eval_stop"), error = invokeRestart("eval_error", 
 .             cnd))
 . }, "object 'amp' not found", base::quote(data.frame(.)))
In [35]:
fig.size(5,6)
FeaturePlot(amp, features = c("FOXP3"))

1. Strategy 1 - Use AMP Treg annotations¶

1.1. Explore Treg proportions per donor in AMP¶

In [28]:
exp.dat <- amp@meta.data[, c("cluster_number", "donor")]%>% mutate(Treg = ifelse(cluster_number %in% c("T-8", "T-9"), "Treg", "Other T"))%>% 
    group_by(donor,Treg)%>% summarize(n = n())%>% ungroup(donor, Treg)
# exp.dat
fig.size(5,10)
p <- ggplot(exp.dat, aes(x=donor, y=n, fill=Treg)) + theme_bw(base_size = 20) + theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p + geom_bar(stat="identity")
p <- p + geom_bar(stat="identity", position = "fill")
p
# p + geom_hline(yintercept=0.01)
`summarise()` has grouped output by 'donor'. You can override using the
`.groups` argument.
In [47]:
unique(amp$cluster[grepl("CD4\\+", amp$cluster)])
  1. 'T-6: CD4+ memory'
  2. 'T-5: CD4+ GZMK+ memory'
  3. 'T-8: CD4+ CD25-high Treg'
  4. 'T-2: CD4+ IL7R+CCR5+ memory'
  5. 'T-12: CD4+ GNLY+'
  6. 'T-4: CD4+ naive'
  7. 'T-0: CD4+ IL7R+ memory'
  8. 'T-3: CD4+ Tfh/Tph'
  9. 'T-1: CD4+ CD161+ memory'
  10. 'T-10: CD4+ OX40+NR3C1+'
  11. 'T-9: CD4+ CD25-low Treg'
  12. 'T-11: CD4+ CD146+ memory'
  13. 'T-7: CD4+ Tph'
In [49]:
exp.dat <- amp@meta.data[, c("cluster","donor")]%>% filter(grepl("CD4\\+", cluster))%>% 
    mutate(Treg = ifelse(grepl("Treg", cluster), "Treg", "Other CD4+T"))%>% 
    group_by(donor,Treg) %>% summarize(n = n())%>% ungroup(donor, Treg)

# exp.dat
fig.size(5,10)
p <- ggplot(exp.dat, aes(x=donor, y=n, fill=Treg)) + theme_bw(base_size = 20) + theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p + geom_bar(stat="identity")
p <- p + geom_bar(stat="identity", position = "fill")
p
# p + geom_hline(yintercept=0.01)
`summarise()` has grouped output by 'donor'. You can override using the
`.groups` argument.
In [53]:
exp.dat %>% group_by(donor, Treg) %>% summarise(n = sum(n)) %>% ungroup() %>% 
group_by(donor) %>% mutate(total = sum(n)) %>% group_by(donor, Treg) %>%  mutate(prop = n/total) %>% 
filter(Treg == "Treg") %>% ungroup() %>% with(prop) -> Treg.prop

round(min(Treg.prop)*100,1)
round(max(Treg.prop)*100,1)
round(median(Treg.prop)*100, 1)
round(mean(Treg.prop)*100,1)
round(IQR(Treg.prop)*100,1)
round(sd(Treg.prop)*100,1)

hist(Treg.prop)
`summarise()` has grouped output by 'donor'. You can override using the
`.groups` argument.
3.3
24.4
9
10.2
5.8
4.6
In [29]:
exp.dat <- amp@meta.data[, c("cluster_number", "donor", "sex")]%>% 
    mutate(Treg = ifelse(cluster_number %in% c("T-8", "T-9"), "Treg", "Other T"))%>% 
    group_by(donor,Treg, sex)%>% summarize(n = n())%>% ungroup()
p <- ggplot(exp.dat, aes(x=donor, y=n, fill=Treg)) + theme_bw(base_size = 20) + facet_wrap(~sex) + theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p + geom_bar(stat="identity")
p <- p + geom_bar(stat="identity", position = "fill")
p
`summarise()` has grouped output by 'donor', 'Treg'. You can override using the
`.groups` argument.
In [30]:
exp.dat.f <- amp@meta.data[, c("cluster_number", "donor")]%>% mutate(Treg = ifelse(cluster_number %in% c("T-8", "T-9"), "Treg", "Other T"))%>% 
    group_by(donor,Treg)%>% summarize(n = n())%>% ungroup(donor, Treg)%>% filter(Treg == "Treg") 
p <- ggplot(exp.dat.f, aes(x=reorder(donor, -n), y=n,  fill=Treg)) + geom_bar(stat="identity") + theme_bw(base_size = 20)+
theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p
min.Tregs <- 20
c <- exp.dat.f%>% filter(n<min.Tregs)%>% count()%>% pull()
remained <- exp.dat.f%>% filter(n>=min.Tregs)%>% summarise(sum(n))%>% pull()
removed <- exp.dat.f %>% summarise(sum(n))%>% pull() - remained

                                                     
p + geom_hline(yintercept=min.Tregs) + 
annotate("text", x = length(exp.dat.f$donor) - c/2, y = min.Tregs+10, label = paste0("Removing ", c, " donors with <", min.Tregs, " Tregs"), size = 6) +
annotate("text", x = 15, y = 255, label = paste0("Remaining ", remained, " Tregs"), size = 6) + 
annotate("text", x = 65, y = 255, label = paste0("Removing ", removed, " Tregs"), size = 6) 

min.Tregs <- 50
c <- exp.dat.f%>% filter(n<min.Tregs)%>% count()%>% pull()
remained <- exp.dat.f%>% filter(n>=min.Tregs)%>% summarise(sum(n))%>% pull()
removed <- exp.dat.f %>% summarise(sum(n))%>% pull() - remained
p + geom_hline(yintercept=min.Tregs) + 
annotate("text", x = length(exp.dat.f$donor) - c/2, y = min.Tregs+10, label = paste0("Removing ", c, " donors with <", min.Tregs, " Tregs"), size = 6) +
annotate("text", x = 15, y = 255, label = paste0("Remaining ", remained, " Tregs"), size = 6) + 
annotate("text", x = 65, y = 255, label = paste0("Removing ", removed, " Tregs"), size = 6)
`summarise()` has grouped output by 'donor'. You can override using the
`.groups` argument.
In [38]:
amp
An object of class Seurat 
33596 features across 94046 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 2 dimensional reductions calculated: pca, umap
In [39]:
amp$cluster %>% unique
  1. 'T-6: CD4+ memory'
  2. 'T-5: CD4+ GZMK+ memory'
  3. 'T-8: CD4+ CD25-high Treg'
  4. 'T-22: Vdelta1'
  5. 'T-14: CD8+ GZMK+ memory'
  6. 'T-2: CD4+ IL7R+CCR5+ memory'
  7. 'T-12: CD4+ GNLY+'
  8. 'T-4: CD4+ naive'
  9. 'T-0: CD4+ IL7R+ memory'
  10. 'T-3: CD4+ Tfh/Tph'
  11. 'T-1: CD4+ CD161+ memory'
  12. 'T-16: CD8+ CD45ROlow/naive'
  13. 'T-18: Proliferating'
  14. 'T-20: CD38+'
  15. 'T-13: CD8+ GZMK/B+ memory'
  16. 'T-21: Innate-like'
  17. 'T-19: MT-high (low quality)'
  18. 'T-10: CD4+ OX40+NR3C1+'
  19. 'T-9: CD4+ CD25-low Treg'
  20. 'T-15: CD8+ GZMB+/TEMRA'
  21. 'T-11: CD4+ CD146+ memory'
  22. 'T-7: CD4+ Tph'
  23. 'T-23: Vdelta2'
  24. 'T-17: CD8+ activated/NK-like'
In [12]:
fig.size(15, 15)
VlnPlot(amp, features = c("FOXP3", "IL2RA", "AREG"), group.by = "cluster", ncol = 1)
VlnPlot(amp, features = c("FOXP3", "IL2RA", "AREG"), group.by = "cluster", ncol = 1, pt.size = 0.0000001)

1.2. Subset Tregs¶

In [41]:
Treg.names <- c("T-8: CD4+ CD25-high Treg", "T-9: CD4+ CD25-low Treg", "T-18: Proliferating")
Idents(amp) <- "cluster"
treg.amp <- subset(amp, idents= Treg.names)
treg.amp
An object of class Seurat 
33596 features across 7440 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 2 dimensional reductions calculated: pca, umap
In [42]:
#move amp UMAP to metadata 
treg.amp <-AddMetaData(treg.amp, treg.amp@reductions$umap@cell.embeddings[,1], "UMAP_1.amp")
treg.amp <-AddMetaData(treg.amp, treg.amp@reductions$umap@cell.embeddings[,2], "UMAP_2.amp")

1.2.1. Explore Tregs alone¶

In [43]:
fig.size(6,8)
DimPlot(treg.amp)

1.2.2. Redo normalization, feature selection and scaling¶

In [44]:
#RNA
treg.amp <- treg.amp %>% 
        NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
        ScaleData()%>% 
        FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
        RunPCA(verbose = F)
        # RunBalancedPCA(weight.by='orig.ident')

#protein
treg.amp <- treg.amp %>% 
        NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
        ScaleData(assay = "ADT")
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

In [ ]:
fig.size(5,15)
FeaturePlot(treg.amp, features = c("FOXP3", "IL2RA", "AREG", MK), ncol=3, order = T, pt.size = 0.0005)
FeaturePlot(treg.amp, features = c("FOXP3", "IL2RA", "AREG"), ncol=3, order = F, pt.size = 0.0005)
In [ ]:
fig.size(5, 12)
DimPlot(treg.amp, reduction = "pca", group.by = "donor")
In [47]:
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)

fig.size(5,5)

ElbowPlot(treg.amp, ndims = 50, reduction = "harmony") 
ElbowPlot(treg.amp, ndims = 50, reduction = "pca")
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [ ]:
fig.size(5, 12)
DimPlot(treg.amp, reduction = "harmony", group.by = "donor")
In [52]:
# dims_use = 1:20
# treg.amp <- treg.amp %>% 
#             RunUMAP(reduction="harmony", dims=dims_use, verbose=FALSE) %>% 
#             FindNeighbors(reduction="harmony", dims=dims_use, verbose=FALSE) %>% 
#             FindClusters(resolution=0.5, verbose=FALSE)

# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3, spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
    embeddings = HU$embedding,
    assay = 'RNA',
    key = 'HUMAP_',
    global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph

treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
                                  resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7440
Number of edges: 88413

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7769
Number of communities: 13
Elapsed time: 0 seconds
In [53]:
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(5,6)
red.use <- "humap"
DimPlot(treg.amp, reduction = red.use, label = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
In [54]:
fig.size(10,12)
FeaturePlot(treg.amp, reduction = red.use, c("FOXP3", "IL2RA", "AREG", "CXCR6", "CD4", "MKI67"))
FeaturePlot(treg.amp, reduction = red.use, c("FOXP3", "IL2RA", "AREG", "CXCR6", "CD4", "MKI67"), order = T)
In [55]:
fig.size(5,10)
VlnPlot(treg.amp, features = "FOXP3")
VlnPlot(treg.amp, features = "AREG")
VlnPlot(treg.amp, features = "CD4")
VlnPlot(treg.amp, features = "MKI67")
VlnPlot(treg.amp, features = "IL2RA")
In [56]:
unique(rownames(amp@assays$ADT))
  1. 'CD107a/LAMP1-prot'
  2. 'CD112/Nectin-2-prot'
  3. 'CD119/IFN-gamma-R-alpha-chain-prot'
  4. 'CD11b-prot'
  5. 'CD11c-prot'
  6. 'CD127/IL-7R-alpha-prot'
  7. 'CD140a/PDGFR-alpha-prot'
  8. 'CD141/Thrombomodulin-prot'
  9. 'CD144/VE-Cadherin-prot'
  10. 'CD146-prot'
  11. 'CD14-prot'
  12. 'CD155/PVR-prot'
  13. 'CD161-prot'
  14. 'CD163-prot'
  15. 'CD16-prot'
  16. 'CD192/CCR2-prot'
  17. 'CD195/CCR5-prot'
  18. 'CD196/CCR6-prot'
  19. 'CD19-prot'
  20. 'CD1c-prot'
  21. 'CD206/MMR-prot'
  22. 'CD209/DC-SIGN-prot'
  23. 'CD20-prot'
  24. 'CD21-prot'
  25. 'CD226/DNAM-1(11A8)-prot'
  26. 'CD24-prot'
  27. 'CD273/B7-DC/PD-L2-prot'
  28. 'CD274/B7-H1/PD-L1-prot'
  29. 'CD278/ICOS-prot'
  30. 'CD279/PD-1-prot'
  31. 'CD27(LG.3A10)-prot'
  32. 'CD304/Neuropilin-1-prot'
  33. 'CD309/VEGFR2-prot'
  34. 'CD314/NKG2D-prot'
  35. 'CD31-prot'
  36. 'CD34-prot'
  37. 'CD38(HIT2)-prot'
  38. 'CD3-prot'
  39. 'CD44-prot'
  40. 'CD45(2D1)-prot'
  41. 'CD45RA-prot'
  42. 'CD45RO-prot'
  43. 'CD4-prot'
  44. 'CD55-prot'
  45. 'CD56/NCAM-prot'
  46. 'CD64-prot'
  47. 'CD68-prot'
  48. 'CD69-prot'
  49. 'CD86-prot'
  50. 'CD8a-prot'
  51. 'CD90/THY1-prot'
  52. 'CX3CR1-prot'
  53. 'EGFR-prot'
  54. 'FR-beta-prot'
  55. 'HLA-DR-prot'
  56. 'IgG-Fc-prot'
  57. 'IgM-prot'
  58. 'Podoplanin-prot'
In [57]:
fig.size(5,10)
FeaturePlot(treg.amp, reduction = "humap", features = c("CD45RA-prot", "CD45RO-prot"))
FeaturePlot(treg.amp, reduction = "humap", features = c("CCR7", "SELL"))

FetchData(treg.amp, vars = c("CD45RA-prot", "CD45RO-prot", "cluster"), assay = "ADT", slot = "data") -> dat
colnames(dat) <- c("CD45RA", "CD45RO", "cluster")

fig.size(8,10)
dat %>% ggplot(aes(x=CD45RA, y = CD45RO, col = cluster)) + geom_point() + geom_smooth(method = "lm") + theme_bw(base_size = 20) #+ facet_wrap(~cluster)
Warning message:
“Could not find CD45RA-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD45RO-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD45RA-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD45RO-prot in the default search locations, found in ADT assay instead”
`geom_smooth()` using formula = 'y ~ x'
In [58]:
fig.size(5,10)
# RA - naive ; RO - memory
VlnPlot(treg.amp, features = c("CD45RA-prot", "CD45RO-prot"), same.y.lims = T)
Warning message:
“Could not find CD45RA-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD45RO-prot in the default search locations, found in ADT assay instead”
Warning message:
“Removed 1 rows containing non-finite values (`stat_ydensity()`).”
Warning message:
“Removed 1 rows containing missing values (`geom_point()`).”
In [59]:
#number of cells per cluster
p.cells.foxp3 <- FetchData(treg.amp, vars= c("FOXP3", "seurat_clusters"), assay="RNA", slot = "data")
p.cells.foxp3 <- p.cells.foxp3%>% mutate(FOXP3.e = FOXP3 > 0)%>% group_by(seurat_clusters, FOXP3.e)%>% summarise(n = n())%>% ungroup()
ggplot(p.cells.foxp3, aes(x=seurat_clusters, y=n, fill = FOXP3.e)) + geom_bar(stat = "identity")
`summarise()` has grouped output by 'seurat_clusters'. You can override using
the `.groups` argument.

1.2.3. cell qualities per cluster¶

In [60]:
VlnPlot(treg.amp, features = "nFeature_RNA")
VlnPlot(treg.amp, features = "nCount_RNA")
VlnPlot(treg.amp, features = "percent_mito")

1.2.4. cluster markers¶

In [69]:
# treg.markers <- FindAllMarkers(treg.amp, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, verbose = F)
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "seurat_clusters")
treg.markers %>% write.csv("../../analysis/AMP2_RA_tissue/AMP.Treg.all.clustering.markers.csv")

# treg.markers %>%
#     group_by(cluster) %>%
#     slice_max(n = 2, order_by = avg_log2FC)
In [72]:
m.show <- treg.markers %>%
    group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 30, order_by = logFC)%>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)
m.show %>% write.csv("../../analysis/AMP2_RA_tissue/AMP.Treg.clustering.markers.csv")
m.show[1:25,]
A tibble: 25 × 14
row0110111223456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1RPL34 JUNB ISG15 CCL5 FN1 CCL5 TNFRSF18 STMN1 KLF2 ZNF331 IL32 CXCL13 MALAT1
2RPL32 FOS MX1 CD74 PRG4 NKG7 TNFRSF4 TUBA1B JUNB JUNB S100A4 HLA-DRA NEAT1
3RPL13 JUN IFIT2 MS4A1 CST3 GZMA CTLA4 HMGB2 DUSP1 MAGEH1 LTB HLA-DRB1TTN
4RPS18 KLF2 XAF1 NKG7 PLA2G2AGZMK IL32 TUBB TSC22D3 AREG C15orf53CD74 MIAT
5RPL39 FOSB IFIT3 HLA-DRA CLU CCL4 S100A4 MKI67 JUN ICA1 FOXP3 GAPDH PCSK7
6RPS12 NR4A2 IFI6 GZMA FTL GZMH CTSC CENPF RGS1 RGS2 GBP5 TNFRSF18MTRNR2L12
7RPL30 BTG2 IFIT1 HLA-DPB1CRTAC1 CST7 BATF TOP2A CREM SESN3 MALAT1 TNFRSF4 NKTR
8RPS6 ZNF331 PMAIP1 HLA-DQA1CFD CD8A LMNA ASPM DNAJB1 FOS S100A6 RBPJ SYNE2
9RPLP2 CD69 OASL CCL4 HTRA1 HLA-DRB1PHLDA1 HMGN2 FTH1 SRGN PBXIP1 MAF DDX17
10RPL9 YPEL5 LY6E ID2 COL1A2 CD74 TNFRSF1B GZMA BTG2 STAT3 TNFRSF18KLRB1 XIST
11RPL22 DUSP1 ISG20 HLA-DPA1LYZ HLA-DRA LTB HMGB1 KLF6 MAL IL2RA FKBP5 N4BP2L2
12RPL36 BTG1 MX2 BANK1 COL3A1 HLA-DPB1TIGIT CCL5 JUND CREM SYNE2 LGALS1 MT-ND3
13RPS14 AREG HERC5 IGKC TIMP3 ANXA1 TYMP H2AFZ TNFAIP3 RNF19A EMP3 COTL1 POLR2J3.1
14RPS3A CXCR4 OAS1 GZMK ANXA1 APOBEC3GLINC01943NKG7 AC058791.1TSHZ2 SAMHD1 HLA-DPA1OGA
15RPL35AZFP36L2STAT1 HOPX TIMP1 GZMM SPOCK2 SMC4 LTB YPEL5 CD52 CD7 MT-ND2
16RPS13 EEF1B2 EPSTI1 FOS HLA-DRAHLA-DPA1ENO1 TYMS FOSB BTG1 TRAC S100A11 MT-ND5
17RPL37 SOCS3 SAT1 HLA-DRB1MGP COTL1 MAF NUSAP1 LMNA GEM UGP2 HLA-DPB1GOLGA8A
18RPL38 RPS12 RGS1 RALGPS2 DCN CTSW DUSP4 HIST1H4CCD69 RPL9 N4BP2L2 NR3C1 GOLGA8B
19RPS23 RPS8 EIF2AK2HLA-DQB1LUM CD8B GAPDH DUT NR4A2 NMB HLA-A SPOCK2 MT-CYB
20RPS29 RPS6 TYMP MEF2C APOE HCST SRGN DEK SAT1 TIGIT TMSB10 ITM2A NABP1
21RPL11 RPL32 SMCHD1 JUN C1QA KLRG1 FOXP3 PCLAF YPEL5 ZFP36L2B2M PKM PNISR
22RPL19 RPS3A ZC3HAV1PLCG2 MMP3 CCL4L2 PKM HLA-DRB1TENT5C ITM2A NA ARPC1B MT-ND4
23RPS28 JUND TRIM22 ANXA1 VIM IFNG IL2RA GAPDH BTG1 RPS4X NA S100A4 MT-ATP6
24RPL12 RPL9 IFI44L CTSW RNASE1 GZMB CARD16 COTL1 CD52 JUND NA SRGN MT-ND1
25RPS21 RPS13 FOXP3 IL7R EMP3 HLA-DQA1LGALS1 H2AFV DNAJA1 SOCS3 NA HLA-DQB1MT-CO3

OLD: 0 - Some Treg population - with TNFRSF18 1 - AREG, low FOXP3 2 - "naive" phenotype (high expression of ribosomal proteins) 3 - MHC class II (CD74, HLA-DPB1, HLA-DRB1), IFNg signaling (FLNA, HLA-DPB1, HLA-DRB1, SAMHD1, GBP5), NFKB cascade (CD74, FLNA, HLA-DRB1, S100A4, LGALS1) 4 - CXCR4, AREG 5 - high mitochondrial genes, lnc/ low quality cells - REMOVE 6 - CD8 GZMK? -REMOVE 7 - No FOXP3, yes Fibroblasts genes - REMOVE 8 - HSP, stimuli response? 9 - IL7R 10 - proliferating 11 - IFN signature

NEW: 2 - CD8 GZMK - REMOVE 8 - low on FOXP3 (originally from proliferating), high on CXCL13, do not have MKI 67 expression - REMOVE 9- mt high, low quality - REMOVE 11 - non CD4 non prolif - REMOVE 12 - fibroblasts - REMOVE

1.2.5. clusters correlation with AMP¶

In [64]:
#Psudobulk and correlation
amp.clusters <- FetchData(object = amp, 
                          vars = c(treg.amp@assays$RNA@var.features, "cluster"), 
                          slot = "data", assay = "RNA")
amp.clusters <- amp.clusters %>% group_by(cluster) %>% summarise_all("mean")

new.clusters <- FetchData(object = treg.amp, 
                          vars = c(treg.amp@assays$RNA@var.features, "seurat_clusters"), 
                          slot = "data", assay = "RNA")
new.clusters <- new.clusters %>% group_by(seurat_clusters) %>% summarise_all("mean")

amp.clusters <- data.frame(amp.clusters)
row.names(amp.clusters) <- amp.clusters$cluster
amp.clusters$cluster <- NULL
new.clusters <- data.frame(new.clusters)
row.names(new.clusters) <- new.clusters$seurat_clusters
new.clusters$seurat_clusters <- NULL
Warning message in asMethod(object):
“sparse->dense coercion: allocating vector of size 1.4 GiB”
In [65]:
dim(new.clusters)
  1. 13
  2. 2000
In [66]:
cluster.cor <- cor(t(scale(amp.clusters)), t(scale(new.clusters)), use = "complete.obs")
cluster.cor <- data.frame(cluster.cor)
In [67]:
# library(pheatmap)
colorBreaks_cor = seq(-0.8,0.8,length=1000)
palette_cor <- colorRampPalette(c("blue", "white", "red"))(n = length(colorBreaks_cor))

pheatmap(cluster.cor, scale = "none", 
         cluster_cols = F)
pheatmap(cluster.cor, scale = "none", breaks = colorBreaks_cor, color = palette_cor, 
         cluster_cols = T)

1.2.6. Save AMP original Tregs¶

In [113]:
saveRDS(treg.amp, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_orig.rds")
# treg.amp <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_orig.rds")

1.2.7. subset to remove low quality or non Treg clusters¶

1.2.7.1. Redo normalization, scale, FeatSelect¶

In [114]:
Idents(treg.amp) <- "seurat_clusters"

#RNA
treg.amp <- treg.amp %>% 
            subset(idents = c(2,8,9,11,12), invert= T) %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            ScaleData()%>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            RunPCA(verbose = F)

#protein
treg.amp <- NormalizeData(treg.amp, normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
            ScaleData(assay = "ADT")

treg.amp
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

An object of class Seurat 
33596 features across 5863 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
In [ ]:
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)
fig.size(5,5)

ElbowPlot(treg.amp, ndims = 50, reduction = "harmony") 
ElbowPlot(treg.amp, ndims = 50, reduction = "pca")
In [117]:
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3,
                 spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
    embeddings = HU$embedding,
    assay = 'RNA',
    key = 'HUMAP_',
    global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph

treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
                                  resolution = 1, verbose = TRUE)


# dims_use = 1:20
# treg.amp <- treg.amp %>% 
#             FindNeighbors(reduction="harmony", dims=dims_use, verbose=FALSE) %>% 
#             FindClusters(resolution=0.5, verbose=FALSE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 5863
Number of edges: 70188

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7157
Number of communities: 14
Elapsed time: 0 seconds
In [118]:
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(5, 6)
red.use = "humap"
DimPlot(treg.amp, reduction = red.use, label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
In [ ]:
fig.size(8,16)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "MKI67", "CD4", "CD8A"), ncol=3, order = T, pt.size = 0.5)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "MKI67", "CD4", "CD8A"), ncol=3, order = F, pt.size = 0.5)
In [120]:
Idents(treg.amp) <- "seurat_clusters"
treg.amp <- FindSubCluster(treg.amp, cluster = 3, resolution = 0.3, graph.name = "humap_fgraph")


fig.size(8,8)
DimPlot(treg.amp, reduction = red.use, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20)) +
    scale_color_tableau("Tableau 20")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 628
Number of edges: 6528

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8060
Number of communities: 4
Elapsed time: 0 seconds
In [121]:
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "sub.cluster")
In [122]:
m.show <- treg.markers %>%
    group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)
m.show[1:30,]
A tibble: 30 × 18
row011011121323_03_13_23_3456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1MALAT1 JUNB ZNF331 ISG15 TNFRSF18TNFRSF4 TNFRSF18 STMN1 CENPF STMN1 TUBA1B IL32 ZNF331 KLF2 TNFRSF4 LMNA CCL5
2RPL13 FOS JUNB MX1 TNFRSF4 TNFRSF18S100A4 CCL5 ASPM TUBA1B STMN1 S100A4 JUNB JUNB TNFRSF18S100A4 NKG7
3MT-ND2 JUN ANXA1 XAF1 TNFRSF1BPKM IL32 TUBA1B HMGB2 DUT TUBB LTB FOS DUSP1 CTLA4 KLF2 GZMA
4MT-ND1 KLF2 FOS IFI6 CTLA4 ENO1 TNFRSF4 HMGB2 MKI67 PCNA HMGB2 MALAT1 RGS2 JUN SRGN S100A10 GZMH
5RPL32 NR4A2 AREG IFIT3 CTSC HSP90AB1LGALS1 TUBB CCL5 GAPDH TOP2A CD52 AREG TSC22D3 MAF ANXA2 STMN1
6MT-CYB CD69 CD69 LY6E CD7 EIF5A CTSC NKG7 STMN1 CD74 HIST1H4CGBP5 SESN3 DNAJB1 DUSP4 S100A6 GZMK
7RPLP2 BTG2 CREM ISG20 SPOCK2 BATF CTLA4 GZMA GZMA TYMS MKI67 TXNIP BTG1 TNFAIP3 CD7 MYADM CCL4
8RPL34 FOSB FOSB OASL BATF PGAM1 LMNA HIST1H4CTOP2A TUBB H2AFZ SYNE2 ZFP36L2BTG2 ITM2A EMP3 CD8A
9MT-ND3 ZNF331 NR4A2 IFIT2 IL32 MIR155HGBATF MKI67 TUBA1B HLA-DRA HMGB1 EVL CXCR4 RGS1 TIGIT CD52 DUT
10MT-ND4 YPEL5 TNFAIP3 MX2 PTPN7 PARK7 LGALS3 TOP2A NKG7 HMGB2 CENPF N4BP2L2ITM2A FTH1 SPOCK2 LGALS3 HLA-DRB1
11RPL39 ZFP36L2YPEL5 IFIT1 LAIR2 CALR TYMP ASPM PTTG1 COTL1 TYMS NKTR YPEL5 KLF6 KLRB1 LGALS1 GZMB
12RPS18 EEF1B2 DUSP1 STAT1 RNF213 PSME2 GLRX CENPF TUBB PCLAF DUT PNISR SRGN FOSB RNF19A VIM TUBA1B
13RPS6 AREG DUSP2 OAS1 ENTPD1 TNFRSF9 LINC01943NUSAP1 SMC4 PKM HMGN2 S100A6 MAL CREM CD2 ITGB1 COTL1
14RPL38 DUSP1 ZFP36L2 EPSTI1 NEAT1 GAPDH IL2RA HMGB1 CENPE H2AFZ GAPDH ACAP1 JUND JUND UCP2 S100A11 PCNA
15RPS14 RPS12 SRGN EIF2AK2LTB SRM HLA-DRB1 HMGN2 UBE2C HLA-DRB1MT2A CYBA MAGEH1 MYADM ARID5B TSC22D3 HLA-DRA
16RPL9 CXCR4 SLC2A3 HERC5 MT-ND5 CTLA4 CD74 CCL4 ARL6IP1HMGB1 HLA-DRB1DDX17 STAT3 AC058791.1NMB IL32 CD74
17MT-CO3 DNAJA1 DNAJA1 RGS1 IL2RA DNPH1 S100A6 SMC4 HMGN2 TPI1 NUSAP1 PTPRC RPL9 FOS CREM TAGLN2 APOBEC3G
18RPL37 SOCS3 JUN TYMP MAF LDHA PHLDA1 GZMK TPX2 DEK HLA-DRA HLA-A RPS27 NR4A2 LAG3 CRIP2 PCLAF
19RPS12 BTG1 CXCR4 PMAIP1 TYMP RANBP1 GAPDH HIST1H1CUBE2S ENO1 PCLAF TMSB10 RNF19A LMNA ICA1 CRIP1 TYMS
20RPL35A RPS3A AC020916.1IFI44L LAYN NME1 ENO1 HIST1H1DNUSAP1 LGALS1 ASPM B2M RPS13 TENT5C CD27 ATP2B1 HMGB2
21RPL30 RPS6 SOCS1 SP100 PKM SEC61B TIGIT TYMS CCNB1 PFN1 ENO1 NA RPL30 HSP90AA1 LAIR2 SAMHD1 HLA-DPA1
22RPL36 RPS13 LMNA SAMD9L LGALS1 PRDX1 TNFRSF1B H2AFZ DLGAP5 SLC25A5 CD74 NA RPS3A CD69 MAGEH1 ISG20 DEK
23RPL12 RPS8 IL7R TRIM22 CD2 NDUFS6 CARD16 TMPO KIF20B RANBP1 UBE2C NA TSHZ2 DNAJA1 CD4 ARL4C CST7
24RPS20 PABPC1 RGCC SAT1 CD74 EIF5B FOXP3 HIST1H1BHMGB1 ACTB SLC25A5 NA ICA1 IER2 C9orf16 SH3BGRL3CTSW
25MT-ATP6RPL9 NR3C1 ZC3HAV1LAG3 NINJ1 MYL6 UBE2C JPT1 MCM7 PTTG1 NA RPS4X ZFP36 LIMS1 DUSP4 ANXA1
26RPS23 RPS5 ALOX5AP SMCHD1 IL2RG PPA1 CYTOR H2AFV KPNA2 HLA-DPA1SMC4 NA RPL21 IRF1 IL32 CLIC1 HELLS
27RPS3A RPL32 GPR183 RNF213 IL2RB SET CLIC1 DEK TUBA1C HMGN2 LGALS1 NA RPS14 YPEL5 DUSP2 EZR MT2A
28RPL27A RPL3 BTG2 SP110 CD4 PSMA7 DUSP4 KNL1 LGALS1 PPIA TPI1 NA EEF1A1 BTG1 PHLDA1 TXN LCP1
29RPL11 RPL30 JUND PARP14 CD27 TNFRSF1BS100A10 CD8A BIRC5 TK1 RAN NA RPS27A PPP1R15A NDUFV2 CORO1A GSTP1
30MT-CO2 NFKBIA BTG1 IRF1 TNFRSF9 REL CXCR6 H2AFX H2AFZ RAN RRM2 NA RPL3 CD55 PKM PBXIP1 IDH2
In [136]:
fig.size(5, 20)
VlnPlot(treg.amp, features = c("FOXP3", "CD4", "CD8A", "MKI67"), group.by = "sub.cluster")

1.2.7.2. Remove CD8 clusters - 3_0, 3_1, 9 (from prolif)¶

In [129]:
#RNA
Idents(treg.amp) <- "sub.cluster"
treg.amp <- treg.amp %>% 
            subset(idents = c("3_0", "3_1", "9"), invert= T) %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            ScaleData()%>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            RunPCA(verbose = F)

#protein
treg.amp <- NormalizeData(treg.amp, normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
            ScaleData(assay = "ADT")

treg.amp
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

An object of class Seurat 
33596 features across 5368 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
In [130]:
fig.size(4,4)
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [133]:
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3,
                 spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
    embeddings = HU$embedding,
    assay = 'RNA',
    key = 'HUMAP_',
    global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph

treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
                                  resolution = 0.5, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 5368
Number of edges: 64887

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7972
Number of communities: 12
Elapsed time: 0 seconds
In [134]:
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(5, 6)
red.use = "humap"
DimPlot(treg.amp, reduction = red.use, label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
In [135]:
fig.size(8,16)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "MKI67", "CD4", "CD8A"),
            ncol=3, order = F)
In [ ]:

In [137]:
fig.size(5, 10)
FeaturePlot(treg.amp, reduction = "humap", features = c("IL7R", "CD127/IL-7R-alpha-prot"), order = F)
VlnPlot(treg.amp, features = c("IL7R", "CD127/IL-7R-alpha-prot"))
VlnPlot(treg.amp, features = c("FOXP3", "IL2RA"))
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
In [138]:
table(treg.amp$cluster)
     T-18: Proliferating T-8: CD4+ CD25-high Treg  T-9: CD4+ CD25-low Treg 
                     344                     2510                     2514 
In [168]:
treg.amp
table(treg.amp$seurat_clusters)
An object of class Seurat 
33596 features across 5368 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
   0    1    2    3    4    5    6    7    8    9   10   11 
1631 1054  858  467  424  264  253  139  130   54   48   46 
In [169]:
5368 - 264
5104
In [139]:
DotPlot(treg.amp, features = c("FOXP3", "IL2RA", "IL7R", "MKI67"), scale = F)
In [140]:
#Psudobulk 
pb <- FetchData(object = treg.amp, 
                          vars = c("FOXP3", "IL2RA", "AREG", "IL7R", "CD127/IL-7R-alpha-prot", "seurat_clusters"), 
                          slot = "data", assay = "RNA")
pb <- pb %>% group_by(seurat_clusters) %>% summarise_all("mean") %>% rename("CD127.prot" = "adt_CD127/IL-7R-alpha-prot")
# pb%>% head
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
In [141]:
pb%>% ggplot(aes(x = FOXP3, y = CD127.prot, label = seurat_clusters, color = AREG)) + geom_point() +  geom_smooth(method = "lm", se = FALSE) + geom_label()
`geom_smooth()` using formula = 'y ~ x'
Warning message:
“The following aesthetics were dropped during statistical transformation: label,
colour
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?”
In [164]:
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "seurat_clusters")

m.show <- treg.markers %>%
    group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)
m.show[1:30,]
A tibble: 30 × 13
row01101123456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1RPL13 TNFRSF18 AC004585.1LMNA JUNB JUN IL32 TUBA1B ITM2A TNFRSF18 CCL5 ISG15
2RPLP2 LGALS1 TSHZ2 S100A10ZNF331 KLF2 GBP5 STMN1 DUSP4 IL32 GZMK MX1
3RPL32 IL32 ITM2A VIM FOS DUSP1 SYNE2 TUBB SRGN NEAT1 ANXA1 XAF1
4RPS12 S100A4 TCF7 MYADM NR4A2 BTG2 SLFN5 HMGB2 TNFRSF4 SPOCK2 GZMA IFI6
5RPS18 CTSC RBPJ CRIP2 YPEL5 JUNB NKTR DUT KLRB1 MAF GIMAP7 LY6E
6RPS6 TNFRSF4 PGM2L1 CXCR4 AREG DNAJB1 FLNA HMGB1 NMB TNFRSF4 CST7 ISG20
7RPL34 HLA-DRB1 CXCL13 TAGLN2 JUN RGS1 TTN H2AFZ RNF19A TNFRSF1B NKG7 EIF2AK2
8RPS20 CD74 FKBP5 CRIP1 FOSB FOS EVL GAPDH CTLA4 CYTOR GZMM EPSTI1
9RPL39 GAPDH MAF KLF6 CD69 CD69 PCSK7 TYMS CD7 CD7 GIMAP4 STAT1
10RPS14 ENO1 RNF19A CD69 DNAJA1 FOSB MTRNR2L12MKI67 ICA1 BATF COTL1 MX2
11RPL30 CTLA4 PVALB AHNAK ZFP36L2TSC22D3 MALAT1 CD74 MAF CTLA4 ALOX5AP OAS1
12RPS3A LGALS3 NMB S100A11DUSP2 JUND SAMHD1 HLA-DRA NR3C1 DUSP4 HCST HERC5
13RPL9 MYL6 STAT3 TSC22D3BTG2 KLF6 CD74 HLA-DRB1C9orf16 FOXP3 CD99 IFIT1
14RPL38 LMNA ID2 ATP2B1 DUSP1 HSP90AA1 S100A4 PCNA TNFRSF18LINC01943 APOBEC3GSP100
15RPL35ACLIC1 NR3C1 EZR SOCS3 CREM N4BP2L2 HMGN2 UCP2 CD2 KLRG1 TYMP
16RPS13 CYTOR FYB1 RGCC CXCR4 NR4A2 RNF213 PCLAF MAGEH1 CD74 AHNAK IFIT3
17RPL3 S100A6 SESN3 LRRFIP1UBE2S DNAJA1 HLA-DPB1 COTL1 TIGIT RNF213 MYL12A SAMD9L
18RPS8 TYMP IL6ST ANXA2 EEF1B2 MYADM PNISR ENO1 CD2 MIR4435-2HGLYAR TRIM22
19RPL37 S100A10 PPP2R5C IDS BTG1 GADD45B IL10RA LGALS1 DUSP2 CTSC CD8A IFI44L
20RPS23 TNFRSF1B DRAIC EMP3 PABPC1 TNFAIP3 DDX17 TPI1 TSHZ2 TIGIT CD81 RNF213
21MT-ND1ARPC1B ZFP36L1 CD44 CYCS AC058791.1CD52 MT2A CD27 PHLDA1 VIM OASL
22RPL36 BATF TRIM8 JUND JUND IER2 MPHOSPH8 SLC25A5 NDUFV2 C4orf48 ANK3 BST2
23EEF1B2GLRX CD84 RPS5 RPS12 IRF1 HLA-A PKM OAZ1 ENTPD1 ITGB2 SAMD9
24RPL27APKM PPP1CC S100A6 RPL21 FTH1 ARGLU1 TOP2A PEBP1 LAYN CAPZB IFI16
25RPS25 ACTB SLC9A9 PGM2L1 RGCC LMNA CYBA DEK GAPDH GBP5 GPR171 IRF7
26MALAT1LINC01943CD28 RPL10A RPS3A YPEL5 S100A6 RANBP1 ALOX5AP MT-ND5 IFITM2 SAT1
27RPL12 UCP2 ARMH1 PCSK1N SLC2A3 SAT1 PTPRC CENPF ID2 IL2RB ANXA6 SMCHD1
28RPL11 DUSP4 CEMIP2 NUDC RPS13 ZFP36 TMSB10 HLA-DPA1HMGN1 SAT1 STK17A SPATS2L
29RPL22 IL2RA ST8SIA1 DUSP10 RPS5 HSPA8 NA RAN RBPJ CD4 CIB1 FOXP3
30MT-ND2HLA-A HIF1A SSBP4 CHMP1B SELENOK NA ACTB COTL1 TYMP NR3C1 SP110
In [166]:
fig.size(5,7)
sum.donor <- treg.amp@meta.data %>% filter(treatment != "OA") %>% select(donor, CTAP, treatment, seurat_clusters) 
pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 2)
# colSums(pt)
pheatmap(pt, scale = "none", main = "prop of cluster")
pt %>%  data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>% 
    ggplot(aes(x = CTAP, y = Freq, fill = Cluster)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")


pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 1)
# pt
pheatmap(pt, scale = "none", main = "prop of CTAP")
pt %>%  data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>% 
    ggplot(aes(x = Cluster, y = Freq, fill = CTAP)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")

1.2.8. Save AMP QC'd Tregs¶

In [162]:
saveRDS(treg.amp, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_filtered_QC.rds")

# treg.amp <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_filtered_QC.rds")
In [171]:
fig.size(10, 10)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "CXCR6"), ncol=2, order = F, pt.size = 0.0001)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "CXCR6"), ncol=2, order = T, pt.size = 0.0001)

1.2.9. Subset to remove FOXP3-low CD127-high cells¶

1.2.9.1. Redo normalization¶

In [144]:
# removing clusters 8,2,10 ( high CD127 low FOXP3) 

#RNA
# Idents(treg.amp) <- "sub.cluster"
treg.amp <- treg.amp %>% 
        subset(idents = c("2", "8", "10"), invert= T) %>% 
        NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
        ScaleData()%>% 
        FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
        RunPCA(verbose = F)

#protein
treg.amp <- treg.amp %>% 
        NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
        ScaleData(assay = "ADT")

treg.amp
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

An object of class Seurat 
33596 features across 4332 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
In [170]:
4332- 264
4068
In [145]:
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)

fig.size(5,5)

ElbowPlot(treg.amp, ndims = 50, reduction = "harmony")
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [146]:
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3,
                 spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
    embeddings = HU$embedding,
    assay = 'RNA',
    key = 'HUMAP_',
    global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph

treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
                                  resolution = 0.5, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 4332
Number of edges: 51609

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8115
Number of communities: 10
Elapsed time: 0 seconds
In [147]:
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(4, 5)
DimPlot(treg.amp, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
In [148]:
fig.size(4,15)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "NR3C1"), ncol=4, order = T, pt.size =0.0001)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "NR3C1"), ncol=4, order = F, pt.size = 0.0001)
In [149]:
VlnPlot(treg.amp, features = c("FOXP3", "AREG"))
In [150]:
#number of cells per cluster
p.cells.foxp3 <- FetchData(treg.amp, vars= c("FOXP3", "seurat_clusters"), assay="RNA", slot = "data")
p.cells.foxp3 <- p.cells.foxp3%>% mutate(FOXP3.e = FOXP3 > 0)%>% group_by(seurat_clusters, FOXP3.e)%>% summarise(n = n())%>% ungroup()
ggplot(p.cells.foxp3, aes(x=seurat_clusters, y=n, fill = FOXP3.e)) + geom_bar(stat = "identity")
`summarise()` has grouped output by 'seurat_clusters'. You can override using
the `.groups` argument.

1.2.9.2. cluster markers¶

In [151]:
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "seurat_clusters")

m.show <- treg.markers %>%
    group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)
m.show[1:30,]
A tibble: 30 × 11
row0123456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1RPL13 TNFRSF18 JUN IL32 TUBA1B TNFRSF18 NMB LMNA ISG15 GZMK
2RPL32 TNFRSF4 KLF2 SYNE2 STMN1 CD7 ICA1 S100A10MX1 GZMA
3RPLP2 IL32 DUSP1 RNF213 TUBB MAF ITM2A CRIP2 XAF1 CCL5
4RPS12 CTSC JUNB N4BP2L2 HMGB2 TNFRSF4 ZNF331 VIM IFI6 CST7
5KLF2 S100A4 FOS GBP5 DUT CD2 NR3C1 ANXA1 LY6E GIMAP7
6RPS18 LGALS1 BTG2 MALAT1 H2AFZ CTLA4 SRGN TAGLN2 IFIT3 NUCB2
7RPS6 CTLA4 FOSB SLFN5 HMGB1 SPOCK2 C9orf16 CRIP1 OASL CD81
8RPS20 GAPDH DNAJB1 NKTR TYMS NEAT1 NDUFV2 MYADM MX2 ANXA1
9RPS3A ENO1 RGS1 TXNIP GAPDH TNFRSF1B RNF19A AHNAK ISG20 LYAR
10RPL34 MYL6 CD69 EVL MKI67 DUSP4 GPX4 S100A11STAT1 TUT4
11FOS SRGN NR4A2 MTRNR2L12HLA-DRA IL32 TIGIT ATP2B1 IFIT1 RPL23A
12ZFP36L2LMNA KLF6 SAMHD1 PCNA CYTOR DUSP4 LRRFIP1OAS1 ATM
13RPL9 TYMP TSC22D3 PNISR HMGN2 SRGN TNFRSF4 ANXA2 EPSTI1 RPS12
14RPL30 CLIC1 CREM MPHOSPH8 CD74 SYNE2 RGS2 EMP3 SP100 HCST
15RPS14 BATF JUND DDX17 PCLAF FOXP3 CD7 RGCC HERC5 UTRN
16RPL39 HLA-DRB1 DNAJA1 ARGLU1 COTL1 PBXIP1 PEBP1 KLF6 TYMP CTSW
17RPL38 CD74 GADD45B CD52 HLA-DRB1RNF213 MAGEH1 PBXIP1 EIF2AK2CCR7
18RPS8 TNFRSF1B MYADM PTPRC SLC25A5 EML4 HINT1 EZR ZC3HAV1NKG7
19RPS13 DUSP4 HSP90AA1 NA TPI1 PHLDA1 SESN3 IDS SMCHD1 RPS27
20EEF1B2 ARPC1B IER2 NA ENO1 XIST AC004585.1S100A6 SAMD9L RPL21
21TCF7 PHLDA1 FTH1 NA TOP2A PRDM1 CD27 TSC22D3IFI44L GIMAP1
22RPL3 CD2 AC058791.1NA DEK IL2RB FABP5 CD44 TRIM22 RPS3
23RPL35A PKM TNFAIP3 NA MT2A N4BP2L2 RPL15 CXCR4 SAT1 AC004585.1
24RPS23 CYTOR HSPA8 NA PKM THADA HMGN1 CD99 BST2 TCF7
25RPL37 LINC01943LMNA NA RANBP1 CD4 RNASET2 YWHAQ IFIT2 RPS14
26RPL27A TIGIT PPP1R15A NA CENPF MIR4435-2HGSLC25A3 PLP2 IRF7 ANK3
27RPL11 ACTB YPEL5 NA RAN CST7 SEC11C SRGN SAMD9 RPS27A
28RPS25 LGALS3 IRF1 NA HIST1H4CRBPJ LBH UCP2 IFI16 RPL30
29RPL36 MAF SAT1 NA LGALS1 C4orf48 SEC11A RABAC1 SPATS2LRPL13A
30CCR7 HLA-A ZFP36 NA LDHA TYMP OAZ1 CLIC1 RNF213 RPL35A
In [161]:
fig.size(4,15)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "GZMK", "CD4", "CD8A"), ncol=4, order = F, pt.size = 0.0001)
In [152]:
fig.size(5,7)
sum.donor <- treg.amp@meta.data %>% filter(treatment != "OA") %>% select(donor, CTAP, treatment, seurat_clusters) %>% distinct()
pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 2)
# colSums(pt)
pheatmap(pt, scale = "none", main = "prop of cluster")
pt %>%  data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>% 
    ggplot(aes(x = CTAP, y = Freq, fill = Cluster)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")


pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 1)
# pt
pheatmap(pt, scale = "none", main = "prop of CTAP")
pt %>%  data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>% 
    ggplot(aes(x = Cluster, y = Freq, fill = CTAP)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")
In [153]:
table(treg.amp$cluster_name)
     T-18: Proliferating T-8: CD4+ CD25-high Treg  T-9: CD4+ CD25-low Treg 
                     302                     2262                     1768 

1.2.10. Save Refined Tregs¶

In [110]:
saveRDS(treg.amp, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_filtered_refined.rds")

# treg.amp <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_filtered_refined.rds")

2. Strategy 2 - Alternative identification of Tregs by independent clustering¶

2.1. Look at correlations within AMP annotations¶

In [106]:
#Psudobulk and correlation
amp.clusters <- FetchData(object = amp, 
                          vars = c(amp@assays$RNA@var.features, "cluster"), 
                          slot = "data", assay = "RNA")
amp.clusters <- amp.clusters %>% group_by(cluster) %>% summarise_all("mean")
amp.clusters <- data.frame(amp.clusters)
row.names(amp.clusters) <- amp.clusters$cluster
amp.clusters$cluster <- NULL

cluster.cor.amp <- cor(t(scale(amp.clusters)), t(scale(amp.clusters)))
Warning message in asMethod(object):
“sparse->dense coercion: allocating vector of size 1.4 GiB”
In [107]:
cluster.cor.amp <- data.frame(cluster.cor.amp)
In [108]:
cluster.cor.amp[c("T-8: CD4+ CD25-high Treg", "T-9: CD4+ CD25-low Treg"),1:5]
A data.frame: 2 × 5
T.0..CD4..IL7R..memoryT.10..CD4..OX40.NR3C1.T.11..CD4..CD146..memoryT.12..CD4..GNLY.T.13..CD8..GZMK.B..memory
<dbl><dbl><dbl><dbl><dbl>
T-8: CD4+ CD25-high Treg-0.094812170.20524840.10680617-0.2199580-0.08729903
T-9: CD4+ CD25-low Treg 0.141428700.39012590.01403401-0.2340225-0.35842914
In [109]:
fig.size(8,8)
# library(pheatmap)
colorBreaks_cor = seq(-1,1,length=1000)
palette_cor <- colorRampPalette(c("#D4AC0D", "white", "#800080"))(n = length(colorBreaks_cor))

pheatmap(cluster.cor.amp, scale = "none", 
         cluster_cols = T)
pheatmap(cluster.cor.amp, scale = "none", breaks = colorBreaks_cor, color = palette_cor, 
         cluster_cols = T)

2.2. Extract CD4 T cells and reclustering¶

In [110]:
fig.size(5, 20)
VlnPlot(amp, features = c("FOXP3","IL2RA"), ncol=2, group.by = "cluster")
VlnPlot(amp, features = c("CD4-prot","CD8a-prot"), ncol=2, pt.size = 0, group.by = "cluster")
VlnPlot(amp, features = c("CD127/IL-7R-alpha-prot"), ncol=2, pt.size = 0, group.by = "cluster")
Warning message:
“Could not find CD4-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD8a-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
In [111]:
unique(amp$cluster[grepl("CD4\\+", amp$cluster)])
  1. 'T-6: CD4+ memory'
  2. 'T-5: CD4+ GZMK+ memory'
  3. 'T-8: CD4+ CD25-high Treg'
  4. 'T-2: CD4+ IL7R+CCR5+ memory'
  5. 'T-12: CD4+ GNLY+'
  6. 'T-4: CD4+ naive'
  7. 'T-0: CD4+ IL7R+ memory'
  8. 'T-3: CD4+ Tfh/Tph'
  9. 'T-1: CD4+ CD161+ memory'
  10. 'T-10: CD4+ OX40+NR3C1+'
  11. 'T-9: CD4+ CD25-low Treg'
  12. 'T-11: CD4+ CD146+ memory'
  13. 'T-7: CD4+ Tph'
In [112]:
Idents(amp) <- "cluster"
alt.Treg <- subset(amp, idents = unique(amp$cluster[grepl("CD4\\+", amp$cluster)]))
In [113]:
alt.Treg
An object of class Seurat 
33596 features across 58105 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 2 dimensional reductions calculated: pca, umap
In [114]:
alt.Treg <- alt.Treg %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            ScaleData() %>% 
            RunPCA(verbose = F)
Centering and scaling data matrix

In [115]:
alt.Treg <- NormalizeData(alt.Treg, normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
            ScaleData(assay = "ADT")
Normalizing across cells

Centering and scaling data matrix

In [ ]:
fig.size(5,10)
DimPlot(alt.Treg, reduction = "pca")
DimPlot(alt.Treg, reduction = "pca", group.by = "donor",shuffle = T)
In [117]:
alt.Treg <- RunHarmony(alt.Treg, group.by.vars="donor", reduction = "pca", theta = 0, plot_convergence = 2,
                    ## prevent early stopping
                     epsilon.harmony = -Inf, epsilon.cluster = -Inf,
                     max.iter.harmony = 10, max.iter.cluster = 50,
                     kmeans_init_nstart=5,
                     kmeans_init_iter_max=100)

fig.size(5,5)

ElbowPlot(alt.Treg, ndims = 50, reduction = "harmony") 
ElbowPlot(alt.Treg, ndims = 50, reduction = "pca")
Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [118]:
# dims_use = 1:20
# alt.Treg <- RunUMAP(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindNeighbors(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindClusters(object=alt.Treg, resolution=1, verbose=FALSE)


# Run UMAP
set.seed(1)
HU <- uwot::umap(alt.Treg@reductions$harmony@cell.embeddings, min_dist = 0.3,
                 spread = 0.8, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(alt.Treg)
alt.Treg[['humap']] <- Seurat::CreateDimReducObject(embeddings = HU$embedding,
                                                    assay = 'RNA', 
                                                    key = 'HUMAP_',
                                                    global = TRUE)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(alt.Treg)
alt.Treg[['humap_fgraph']] <- HU_graph

alt.Treg <- FindClusters(alt.Treg, graph.name = 'humap_fgraph',
                                  resolution = 1, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 58105
Number of edges: 699332

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7738
Number of communities: 17
Elapsed time: 6 seconds
In [119]:
fig.size(5,5)

# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
red.use = "humap"
DimPlot(alt.Treg, reduction = red.use, label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20))+
    scale_color_tableau("Tableau 20")
fig.size(5,10)
DimPlot(alt.Treg, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20))
In [120]:
VlnPlot(alt.Treg, features = c("FOXP3", "IL2RA"), ncol = 1)

2.2.1. cluster markers¶

In [121]:
# library(presto)
Idents(alt.Treg) <- "seurat_clusters"
dge_wilxocon <- wilcoxauc(alt.Treg, "seurat_clusters")
In [122]:
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)

dge_wilxocon %>%
    group_by(group) %>%
    slice_max(n = 25, order_by = logFC)%>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) #%>% write.csv("AMP.reclustering.markers.csv")
A tibble: 25 × 18
row011011121314151623456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1ANXA1 CXCL13 NKG7 RPS3A JUN NEAT1 ISG15 FN1 STMN1 CCL5 AREG IL7R IL7R S100A4 RTKN2 LAG3 IL32
2TNFAIP3 TSHZ2 GNLY RPS13 FOSB NKTR IFIT2 PRG4 TUBA1BGZMK JUNB GIMAP7 FOS ANXA1 TIGIT TNFRSF18TIGIT
3CD69 ITM2A GZMH RPS12 FOS DDX17 IFIT3 CLU HMGB2 NKG7 YPEL5 IKZF1 LTB LGALS1 IKZF2 SRGN CTLA4
4LMNA AC004585.1CCL5 EEF1B2IER2 IGKC OASL PLA2G2ATUBB GZMA CD69 TXNIP ANXA1 S100A10 SELL CTLA4 S100A4
5FOS NR3C1 CCL4 RPS5 BTG2 MALAT1 PMAIP1 HTRA1 GAPDH CCL4 KLF2 LTB JUN GZMA SESN3 CXCL13 FOXP3
6JUN MAF FGFBP2 RPS6 HSP90AA1 XIST IFIT1 COL3A1 H2AFZ CST7 ZNF331NKTR KLF2 S100A11 RGS1 GAPDH TNFRSF18
7ZFP36 RNF19A GZMA RPL32 DNAJB1 MIAT MX1 COL1A2 HMGN2 CD74 FOS MT-ND2 DUSP1 S100A6 MAL MAF RTKN2
8NFKBIA FKBP5 CST7 RPS8 PPP1R10 RNF213 IFI44L CRTAC1 MKI67 HLA-DPB1SOCS3 N4BP2L2 FOSB VIM C12orf57TNFRSF4 HLA-DRB1
9KLF6 TIGIT PRF1 SELL EGR1 PCSK7 IFI6 MGP DUT HLA-DRB1BTG2 MTRNR2L12 RPS12 CCL5 ZNF331 CD7 IKZF2
10JUNB PGM2L1 GZMB RPL9 SAT1 N4BP2L2 XAF1 CST3 HMGB1 HLA-DPA1JUN MT-ND1 RPL36AEMP3 JUNB PTMS CYTOR
11FOSB SRGN EFHD2 RPL5 GADD45B MTRNR2L12ZC3HAV1CFD CENPF GZMM NR4A2 LUC7L3 RPS10 CLIC1 LTB S100A4 RGS1
12CXCR4 CORO1B ZEB2 RPL22 HSPA1B POLR2J3.1HERC5 TXNIP MT2A GZMH EEF1B2PNISR TPT1 SH3BGRL3ARID5B LGALS1 LGALS1
13RGCC CD7 PLEK RPL30 PPP1R15A MACF1 LY6E TIMP3 DEK APOBEC3GPABPC1MALAT1 RPLP0 CD52 RPL22 SPOCK2 TNFRSF4
14DUSP1 PPP1CC HOPX RPS23 MTRNR2L12PNISR CD69 LUM TYMS HCST LDHB MT-ND3 RPL34 IL32 RPS8 PDCD1 CD74
15IL7R THADA ID2 RPL21 DDX3X SYNE2 FOS DCN PCNA CCL4L2 RPS13 GIMAP4 KLRB1 CKLF BIRC3 S100A11 DUSP4
16MYADM TNFRSF4 KLRG1 KLF2 MALAT1 ARGLU1 GADD45BTIMP1 TOP2A CMC1 RPS3A DDX17 EEF1B2ANXA2 IGKC RBPJ IL2RA
17AC020916.1LIMS1 KLF2 RPL34 HEXIM1 AKNA MX2 SPARCL1PKM DUSP2 RPS8 PCSK7 S100A4CD99 RPS6 MYL6 TNFRSF1B
18DNAJA1 NMB RAP1B CCR7 NEAT1 ANKRD44 IFI44 FTL ENO1 HLA-DRA RPS12 PRMT2 RPSA HCST RPL32 ALOX5AP CD27
19TSC22D3 COTL1 TRGC2 RPL13 SRSF7 ASH1L ISG20 GSN TPI1 KLRG1 DNAJA1ATM RPL23AACTB RPL12 DUSP4 GBP5
20BTG2 ZNF331 C12orf75RPL37 PLCG2 OGA SAT1 APOE ACTB CD81 CD55 MT-ND5 RPS18 TAGLN2 CD27 CD4 CTSC
21VIM PDCD1 CTSW RPL39 INTS6 MAF EIF2AK2CD63 SMC4 CTSW RPS6 MT-CYB RPL3 PLP2 NABP1 LAIR2 CARD16
22FTH1 DRAIC IFNG RPL35AIER5 CFLAR STAT1 THBS4 PFN1 LYAR CYCS AC004687.1RPS21 MYL6 RPS13 PRDM1 TBC1D4
23KLF2 TNFRSF18 GZMM RPL7 JUNB MT-ND5 JUN MMP3 H2AFV LYST RPS5 LINC00861 RPL39 MYL12A RPL9 PKM LTB
24ZFP36L2 CTLA4 CX3CR1 RPL11 SLC38A2 IKZF1 ANXA1 MT-ND1 NUSAP1IGKC FOSB MT-CO3 LDHB TIMP1 LEF1 TYMP ARID5B
25YPEL5 IL6ST CTSC RPL10AJUND MT-CO2 EPSTI1 ANXA1 LGALS1SH2D1A SELL MTRNR2L8 RPS19 CD2 RPS3A IL32 BATF
In [70]:
p.cells.foxp3 <- FetchData(alt.Treg, vars= c("FOXP3", "seurat_clusters"), assay="RNA", slot = "data")
p.cells.foxp3 <- p.cells.foxp3%>% mutate(FOXP3.e = FOXP3 > 0) %>% group_by(seurat_clusters, FOXP3.e)%>% summarise(n = n())%>% ungroup()
ggplot(p.cells.foxp3, aes(x=seurat_clusters, y=n, fill = FOXP3.e)) + geom_bar(stat = "identity")
Error in FetchData(alt.Treg, vars = c("FOXP3", "seurat_clusters"), assay = "RNA", : object 'alt.Treg' not found
Traceback:

1. FetchData(alt.Treg, vars = c("FOXP3", "seurat_clusters"), assay = "RNA", 
 .     slot = "data")
In [124]:
fig.size(6,6)
FeaturePlot(alt.Treg, reduction = red.use, features = "FOXP3")
In [125]:
alt.Treg$cluster[alt.Treg$seurat_clusters %in% c(7,9)]%>%  table
.
     T-0: CD4+ IL7R+ memory     T-1: CD4+ CD161+ memory 
                        120                          53 
     T-10: CD4+ OX40+NR3C1+    T-11: CD4+ CD146+ memory 
                        524                         129 
           T-12: CD4+ GNLY+ T-2: CD4+ IL7R+CCR5+ memory 
                          3                          43 
          T-3: CD4+ Tfh/Tph             T-4: CD4+ naive 
                        231                         358 
     T-5: CD4+ GZMK+ memory            T-6: CD4+ memory 
                         34                         179 
              T-7: CD4+ Tph    T-8: CD4+ CD25-high Treg 
                         43                        2208 
    T-9: CD4+ CD25-low Treg 
                       1950 

2.2.2. Cluster correlation with AMP annotations¶

In [126]:
#Psudobulk and correlation
amp.clusters <- FetchData(object = alt.Treg, 
                          vars = c(alt.Treg@assays$RNA@var.features, "cluster"), 
                          slot = "data", assay = "RNA")
amp.clusters <- amp.clusters %>% group_by(cluster) %>% summarise_all("mean")

# @assays$RNA@data%>% t()%>% data.frame() %>% mutate(cluster = alt.Treg$cluster)%>%
# group_by(cluster)%>% summarise_all("mean")
# amp.c.names <- amp.clusters$cluster
In [127]:
new.clusters <- FetchData(object = alt.Treg, 
                          vars = c(alt.Treg@assays$RNA@var.features, "seurat_clusters"), 
                          slot = "data", assay = "RNA")
new.clusters <- new.clusters %>% group_by(seurat_clusters) %>% summarise_all("mean")


# new.clusters <- alt.Treg@assays$RNA@data%>% t()%>% data.frame() %>% mutate(s.cluster = alt.Treg$seurat_clusters)%>%
# group_by(s.cluster)%>% summarise_all("mean")
# new.c.names <- new.clusters$s.cluster
In [128]:
amp.clusters <- data.frame(amp.clusters)
row.names(amp.clusters) <- amp.clusters$cluster
amp.clusters$cluster <- NULL
new.clusters <- data.frame(new.clusters)
row.names(new.clusters) <- new.clusters$seurat_clusters
new.clusters$seurat_clusters <- NULL
In [129]:
cluster.cor <- cor(t(scale(amp.clusters)), t(scale(new.clusters)))
cluster.cor <- data.frame(cluster.cor)
In [130]:
# library(pheatmap)
colorBreaks_cor = seq(-0.8,0.8,length=1000)
palette_cor <- colorRampPalette(c("#D4AC0D", "white", "#800080"))(n = length(colorBreaks_cor))

pheatmap(cluster.cor, scale = "none", 
         cluster_cols = F)
pheatmap(cluster.cor, scale = "none", breaks = colorBreaks_cor, color = palette_cor, 
         cluster_cols = T)

2.2.3. Subset Tregs - Keep only clusters that are similar to amp Treg by correlation¶

In [131]:
Treg.clusters <- c(7,9)
In [132]:
alt.Treg <- subset(alt.Treg, idents = Treg.clusters)
In [133]:
alt.Treg
An object of class Seurat 
33596 features across 5875 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
In [134]:
fig.size(5, 10)
FeaturePlot(alt.Treg, features = c("IL7R", "CD127/IL-7R-alpha-prot"), order = T)
VlnPlot(alt.Treg, features = c("IL7R", "CD127/IL-7R-alpha-prot"))
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
In [135]:
alt.Treg <- alt.Treg %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            ScaleData()%>% 
            RunPCA(verbose = F)

alt.Treg <- NormalizeData(alt.Treg, normalization.method = "CLR", margin = 2, assay = "ADT")
alt.Treg <- ScaleData(alt.Treg, assay = "ADT")
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

In [178]:
alt.Treg <- RunHarmony(alt.Treg, group.by.vars="donor", reduction = "pca", theta =0, plot_convergence = 2)


fig.size(5,5)

ElbowPlot(alt.Treg, ndims = 50, reduction = "harmony") 
ElbowPlot(alt.Treg, ndims = 50, reduction = "pca")
Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [179]:
# dims_use = 1:20
# alt.Treg <- RunUMAP(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindNeighbors(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindClusters(object=alt.Treg, resolution=1, verbose=FALSE)

set.seed(1)
HU <- uwot::umap(alt.Treg@reductions$harmony@cell.embeddings, min_dist = 0.3,
                 spread = 0.8, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(alt.Treg)
alt.Treg[['humap']] <- Seurat::CreateDimReducObject(
    embeddings = HU$embedding,
    assay = 'RNA',
    key = 'HUMAP_',
    global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(alt.Treg)
alt.Treg[['humap_fgraph']] <- HU_graph

alt.Treg <- FindClusters(alt.Treg, graph.name = 'humap_fgraph',
                                  resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 5875
Number of edges: 70378

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7496
Number of communities: 13
Elapsed time: 0 seconds
In [180]:
fig.size(6,6)

DimPlot(alt.Treg, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) + 
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [181]:
fig.size(5,5)

# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
red.use = "humap"
DimPlot(alt.Treg, reduction = red.use, group.by = "sex", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) 
fig.size(5,10)
DimPlot(alt.Treg, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20))
In [182]:
fig.size(5,20)
FeaturePlot(alt.Treg, reduction = red.use, features = c("FOXP3", "CXCL13", "CD8A", "AREG"), order = F, ncol=4)
FeaturePlot(alt.Treg, reduction = red.use, features = c("FOXP3", "CXCL13", "CD8A", "AREG"), order = T, ncol=4)


# fig.size(5,30)
# FeaturePlot(alt.Treg, features = c("FOXP3"), pt.size = 0.1, order = T, split.by = "seurat_clusters")
In [ ]:
fig.size(10,15)
FetchData(alt.Treg, vars = c("FOXP3","CD127/IL-7R-alpha-prot", "seurat_clusters"), assay = "RNA", slot = "data") %>%
rename("CD127.prot" = "adt_CD127/IL-7R-alpha-prot") -> pt.dat
pt.dat%>%  
    ggplot(aes(x=FOXP3, y = CD127.prot, col = seurat_clusters)) + geom_point() +facet_wrap(~seurat_clusters) +ylim(0,2)
In [184]:
table(alt.Treg$seurat_clusters)
  0   1   2   3   4   5   6   7   8   9  10  11  12 
895 880 790 729 701 625 476 241 178 129 102  93  36 
In [ ]:
fig.size(8,15)
library(ggridges)
pt.dat %>% 
ggplot(aes(x=CD127.prot, y = seurat_clusters)) + geom_density_ridges2(alpha = 0.5) + scale_fill_tableau("Tableau 20")
In [186]:
VlnPlot(alt.Treg, features = c("FOXP3"))
In [189]:
alt.Treg$seurat_clusters%>%  table
alt.Treg$cluster%>%  table
alt.Treg$cluster[alt.Treg$seurat_clusters %in% c(0,3,5,8)]%>%  table
.
  0   1   2   3   4   5   6   7   8   9  10  11  12 
895 880 790 729 701 625 476 241 178 129 102  93  36 
.
     T-0: CD4+ IL7R+ memory     T-1: CD4+ CD161+ memory 
                        120                          53 
     T-10: CD4+ OX40+NR3C1+    T-11: CD4+ CD146+ memory 
                        524                         129 
           T-12: CD4+ GNLY+ T-2: CD4+ IL7R+CCR5+ memory 
                          3                          43 
          T-3: CD4+ Tfh/Tph             T-4: CD4+ naive 
                        231                         358 
     T-5: CD4+ GZMK+ memory            T-6: CD4+ memory 
                         34                         179 
              T-7: CD4+ Tph    T-8: CD4+ CD25-high Treg 
                         43                        2208 
    T-9: CD4+ CD25-low Treg 
                       1950 
.
     T-0: CD4+ IL7R+ memory     T-1: CD4+ CD161+ memory 
                         58                          27 
     T-10: CD4+ OX40+NR3C1+    T-11: CD4+ CD146+ memory 
                        308                          67 
           T-12: CD4+ GNLY+ T-2: CD4+ IL7R+CCR5+ memory 
                          1                          18 
          T-3: CD4+ Tfh/Tph             T-4: CD4+ naive 
                        167                         239 
     T-5: CD4+ GZMK+ memory            T-6: CD4+ memory 
                         13                          67 
              T-7: CD4+ Tph    T-8: CD4+ CD25-high Treg 
                          5                         209 
    T-9: CD4+ CD25-low Treg 
                       1248 
In [190]:
Idents(alt.Treg) <- "seurat_clusters"
dge_wilxocon <- wilcoxauc(alt.Treg, "seurat_clusters")
In [191]:
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)

dge_wilxocon %>%
    group_by(group) %>%
    slice_max(n = 20, order_by = logFC)%>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) #%>% write.csv("AMP.reclustering.markers.csv")
A tibble: 20 × 14
row0110111223456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1JUNB IL32 KLF6 ISG15 PRG4 TNFRSF18FOS KLF2 NMB GIMAP7KLRB1 RPS4Y1 CCL5
2ZNF331 LGALS1 JUN IFIT2 FN1 TNFRSF4 KLF2 TNFAIP3 ZNF331IGKC IGKC KLF2 GZMK
3CD69 S100A4 TNFAIP3 PMAIP1 PLA2G2A CTSC RPS3A MYADM RNF19ARPS12 AC004585.1JUN GZMA
4NR4A2 RNF213 YPEL5 OASL CLU CTLA4 RPL32 TSC22D3 ITM2A RPS29 ALOX5AP IER2 NKG7
5YPEL5 HLA-DRB1AC020916.1ZC3HAV1CRTAC1 IL32 RPS20 JUN KLRB1 RPLP2 FKBP5 GIMAP7 CST7
6FOSB CD74 ZNF331 IFIT3 MGP LMNA EEF1B2IRF1 NR3C1 RPS21 CD7 HSP90AA1ANXA1
7CXCR4 S100A6 SAT1 MX1 HTRA1 DUSP4 RPS12 KLF6 JUNB RPL39 IGLC2 FOS JUN
8FOS CTSC FOSB RGS1 KLF6 PHLDA1 RPS13 DNAJB1 SRGN RPL13 PTPN7 GADD45B FOSB
9JUN CYTOR FOS IFIT1 TIMP1 GAPDH RPS6 FTH1 TWIST1PNISR SESN3 CD52 DUSP2
10BTG2 C15orf53JUND HERC5 CFD BATF RPS18 CREM RGS2 LUC7L3PTPRC DNAJB1 AHNAK
11ZFP36L2TNFRSF18VPS37B IFI6 COL3A1 CD74 RPL9 CD55 AREG RPL36 NR3C1 TXNIP CCL4
12AREG FLNA SLC2A3 ISG20 LUM TYMP RPLP2 LMNA MAGEH1GIMAP4TNFRSF4 HSPA1B TNFAIP3
13KLF2 S100A10 NR4A2 LY6E COL1A2 ENO1 RPL13 RGS1 ICA1 RPS18 ARGLU1 BTG2 NR4A2
14UBE2S GBP5 LMNA SAT1 RGS1 SRGN RPL30 CD69 CD7 RPL32 CORO1B RIPOR2 ZFP36L2
15DNAJA1 EMP3 RGS1 SRSF3 DCN LGALS1 RPS8 DNAJA1 GEM TXNIP PNISR RPL36A CD69
16CYCS CYBA CREM DNAJA1 TIMP3 TNFRSF1BRPS5 AC058791.1IGFL2 MT-ND2IGHG3 RPL17 ID2
17BTG1 SYNE2 CXCR4 XAF1 BATF HLA-DRB1RPL3 TENT5C AHI1 RPL38 N4BP2L2 AES GZMM
18DUSP1 HLA-DPB1MT-CO1 GADD45BHLA-DRA S100A4 RPL22 DUSP1 MAL SELL IGHM RPS18 CREM
19DUSP2 NKTR JUNB EPSTI1 CD63 CD2 RPS14 CD52 DUSP4 RPS27 ITM2A GIMAP1 IRF1
20CCR7 HLA-DPA1CLK1 ID2 HLA-DPB1PKM RPL27AJUNB DUSP2 RPL34 ICA1 BIN2 HLA-DRB1
In [192]:
#Psudobulk 
pb <- FetchData(object = alt.Treg, 
                          vars = c("FOXP3", "IL2RA", "AREG", "IL7R", "CD127/IL-7R-alpha-prot", "seurat_clusters"), 
                          slot = "data", assay = "RNA")
pb <- pb %>% group_by(seurat_clusters) %>% summarise_all("mean") %>% rename("CD127.prot" = "adt_CD127/IL-7R-alpha-prot")
pb%>% ggplot(aes(x = FOXP3, y = IL7R, label = seurat_clusters, color = AREG)) + geom_point() +  geom_smooth(method = "lm", se = FALSE) + geom_label()
Warning message:
“Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
`geom_smooth()` using formula = 'y ~ x'
Warning message:
“The following aesthetics were dropped during statistical transformation: label,
colour
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?”

2.3 Subset alternative Tregs¶

In [193]:
# 12 - fibroblasts ?
# 9 - GZM signature CD8
# 0,3,5- low FOXP3
# 8 - low FOXP3 - male unique (mix donors)

alt.Treg <- subset(alt.Treg, idents = c(0,3,5,8,9,12), invert = T)
In [194]:
alt.Treg
An object of class Seurat 
33596 features across 3283 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
In [195]:
alt.Treg <- alt.Treg %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            ScaleData()%>% 
            RunPCA(verbose = F)

alt.Treg <- NormalizeData(alt.Treg, normalization.method = "CLR", margin = 2, assay = "ADT")
alt.Treg <- ScaleData(alt.Treg, assay = "ADT")
Centering and scaling data matrix

Normalizing across cells

Centering and scaling data matrix

In [ ]:
alt.Treg <- RunHarmony(alt.Treg, group.by.vars="donor", reduction = "pca", theta = 0, plot_convergence = 2)

fig.size(5,5)

ElbowPlot(alt.Treg, ndims = 50, reduction = "harmony") 
ElbowPlot(alt.Treg, ndims = 50, reduction = "pca")
In [202]:
set.seed(1)
HU <- uwot::umap(alt.Treg@reductions$harmony@cell.embeddings, min_dist = 0.8,
                 spread = 1, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(alt.Treg)
alt.Treg[['humap']] <- Seurat::CreateDimReducObject(
    embeddings = HU$embedding,
    assay = 'RNA',
    key = 'HUMAP_',
    global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(alt.Treg)
alt.Treg[['humap_fgraph']] <- HU_graph

alt.Treg <- FindClusters(alt.Treg, graph.name = 'humap_fgraph',
                                  resolution = 0.5, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 3283
Number of edges: 38619

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8208
Number of communities: 11
Elapsed time: 0 seconds
In [203]:
fig.size(6,6)

DimPlot(alt.Treg, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) + 
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [ ]:
fig.size(4,15)
FeaturePlot(alt.Treg, reduction = red.use, features = c("FOXP3", "AREG", "IL2RA"), order = F, ncol=3)
In [207]:
alt.Treg
An object of class Seurat 
33596 features across 3283 samples within 2 assays 
Active assay: RNA (33538 features, 2000 variable features)
 1 other assay present: ADT
 4 dimensional reductions calculated: pca, umap, harmony, humap
In [206]:
saveRDS(alt.Treg, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_alternative.rds")

2.3.1 check overlap with Treg from AMP¶

In [ ]:
gain.alt <- setdiff(colnames(alt.Treg), colnames(treg.amp))
loss.alt <- setdiff(colnames(treg.amp), colnames(alt.Treg))

length(gain.alt)
length(loss.alt)
In [ ]:
add2amp <- amp@meta.data %>% mutate(add2amp = if_else(condition = grepl("T-[89]", cluster), "overlap Treg", "other"))
add2amp[gain.alt, "add2amp"] <- "alternative.Treg.only"
add2amp[loss.alt, "add2amp"] <- "amp.Treg.only"
amp <- AddMetaData(amp, add2amp$add2amp, "Treg.classification")
In [ ]:
amp$Treg.classification <- factor(amp$Treg.classification, levels = c("other", "overlap Treg", "alternative.Treg.only", "amp.Treg.only"))
DimPlot(amp, group.by = "Treg.classification", order = T, cols = c("grey", "#C48FFE", "#22D396", "#E7A937"), pt.size = 0.5)
In [ ]:
fig.size(5,10)
DimPlot(amp, group.by = "Treg.classification", order = T, cols = c("grey", "#C48FFE", "#22D396", "#E7A937"), pt.size = 0.5, split.by = "Treg.classification")
In [ ]:
Treg.pop.DEG <- wilcoxauc(amp, "Treg.classification", groups_use = c("amp.Treg.only", "alternative.Treg.only"))


options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)

Treg.pop.DEG %>%
    group_by(group) %>%
    slice_max(n = 15, order_by = logFC)%>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)
In [ ]:
Idents(amp) <- "Treg.classification"
DotPlot(amp, features = c("FOXP3", "IL2RA", "AREG", "IL7R", "CD127/IL-7R-alpha-prot"), scale =F)
In [ ]:
#Psudobulk and correlation
new.clusters <- FetchData(object = alt.Treg, 
                          vars = c(alt.Treg@assays$RNA@var.features, "seurat_clusters"), 
                          slot = "data", assay = "RNA")
new.clusters <- new.clusters %>% group_by(seurat_clusters) %>% summarise_all("mean")
In [ ]:
new.clusters <- data.frame(new.clusters)
row.names(new.clusters) <- new.clusters$seurat_clusters
new.clusters$seurat_clusters <- NULL
In [ ]:
cluster.cor <- cor(t(scale(amp.clusters)), t(scale(new.clusters)))
In [ ]:
cluster.cor <- data.frame(cluster.cor)
colorBreaks_cor = seq(-0.8,0.8,length=1000)
palette_cor <- colorRampPalette(c("#D4AC0D", "white", "#800080"))(n = length(colorBreaks_cor))

fig.size(5, 10)
pheatmap(cluster.cor, scale = "none", 
         cluster_cols = F)
pheatmap(cluster.cor, scale = "none", breaks = colorBreaks_cor, color = palette_cor, 
         cluster_cols = T)
In [ ]:

In [ ]:

In [ ]:

In [ ]:
alt.Treg <- FindSubCluster(alt.Treg, cluster = 3, resolution = 0.3, graph.name = "RNA_snn")
In [ ]:
DimPlot(alt.Treg, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
theme(text = element_text(size = 20)) + scale_color_tableau(palette = "Tableau 20")
In [ ]:
VlnPlot(alt.Treg, features = c("FOXP3"), group.by = "sub.cluster") + scale_fill_tableau(palette = "Tableau 20")
In [ ]:
alt.Treg@meta.data %>% filter(seurat_clusters %in% c(3,5,7,14)) %>% summarise(n())